#!/usr/bin/python

import matplotlib.pyplot as plt
from prep_terrain_data import makeTerrainData
from class_vis import prettyPicture

features_train, labels_train, features_test, labels_test = makeTerrainData()


### the training data (features_train, labels_train) have both "fast" and "slow"
### points mixed together--separate them so we can give them different colors
### in the scatterplot and identify them visually
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]


#### initial visualization
'''
plt.xlim(0.0, 1.0)
plt.ylim(0.0, 1.0)
plt.scatter(bumpy_fast, grade_fast, color = "b", label="fast")
plt.scatter(grade_slow, bumpy_slow, color = "r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()
'''
################################################################################


### your code here!  name your classifier object clf if you want the 
### visualization code (prettyPicture) to show you the decision boundary
if 0:
  from sklearn.neighbors import KNeighborsClassifier as KNC
  
  n_neighbors = 15 
  weights     = 'uniform'
  clf = KNC(n_neighbors=n_neighbors, weights=weights)
  
  clf.fit(features_train, labels_train)
  
  pred = clf.predict(features_test)

if 1:
    from sklearn.ensemble import AdaBoostClassifier as ABC
    from sklearn.tree import DecisionTreeClassifier as DTC
    n_estimators = 40
    learning_rate = 1.0

    dt_stump = DTC(max_depth=1, min_samples_leaf=1)

    clf = ABC(base_estimator=dt_stump,
            learning_rate=learning_rate,
            n_estimators=n_estimators,
            algorithm="SAMME")
    clf.fit(features_train, labels_train)
    pred = clf.predict(features_test)

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(labels_test, pred)

print("Accuracy is %s" % round(accuracy, 3))


try:
    prettyPicture(clf, features_test, labels_test)
except NameError:
    pass
